/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.neu.ccs.pyramid.esplugins;

import org.apache.lucene.index.*;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.TermSpans;

import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

/** Matches spans containing a term.
 * This should not be used for terms that are indexed at position Integer.MAX_VALUE.
 */
public class CustomSpanTermQuery extends CustomSpanQuery {

    protected final Term term;
    protected final TermContext termContext;

    /** Construct a CustomSpanTermQuery matching the named term's spans. */
    public CustomSpanTermQuery(Term term) {
        if (isPosTag(term.text())) {
            term = new Term(term.field()+"_tags", getPosTag(term.text()));
        }
        this.term = Objects.requireNonNull(term);
        this.termContext = null;
    }
    private boolean isPosTag(String s) {
        return s.charAt(0) == '<' && s.charAt(s.length() - 1) == '>';
    }

    private String getPosTag(String s) {
        return s.substring(1, s.length() - 1);
    }
    /**
     * Expert: Construct a CustomSpanTermQuery matching the named term's spans, using
     * the provided TermContext
     */
    public CustomSpanTermQuery(Term term, TermContext context) {
        this.term = Objects.requireNonNull(term);
        this.termContext = context;
    }

    /** Return the term whose spans are matched. */
    public Term getTerm() { return term; }

    @Override
    public String getField() { return term.field(); }

    @Override
    public CustomSpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
        final TermContext context;
        final IndexReaderContext topContext = searcher.getTopReaderContext();
        if (termContext == null || termContext.wasBuiltFor(topContext) == false) {
            context = TermContext.build(topContext, term);
        }
        else {
            context = termContext;
        }
        return new CustomSpanTermWeight(context, searcher, needsScores ? Collections.singletonMap(term, context) : null);
    }

    public class CustomSpanTermWeight extends CustomSpanWeight {

        final TermContext termContext;

        public CustomSpanTermWeight(TermContext termContext, IndexSearcher searcher, Map<Term, TermContext> terms) throws IOException {
            super(CustomSpanTermQuery.this, searcher, terms);
            this.termContext = termContext;
            assert termContext != null : "TermContext must not be null";
        }

        @Override
        public void extractTerms(Set<Term> terms) {
            terms.add(term);
        }

        @Override
        public void extractTermContexts(Map<Term, TermContext> contexts) {
            contexts.put(term, termContext);
        }

        @Override
        public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException {

            assert termContext.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);

            final TermState state = termContext.get(context.ord);
            if (state == null) { // term is not present in that reader
                assert context.reader().docFreq(term) == 0 : "no termstate found but term exists in reader term=" + term;
                return null;
            }

            final Terms terms = context.reader().terms(term.field());
            if (terms == null)
                return null;
            if (terms.hasPositions() == false)
                throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run CustomSpanTermQuery (term=" + term.text() + ")");

            final TermsEnum termsEnum = terms.iterator();
            termsEnum.seekExact(term.bytes(), state);

            final PostingsEnum postings = termsEnum.postings(null, requiredPostings.getRequiredPostings());
            float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST;
            return new TermSpans(getSimScorer(context), postings, term, positionsCost);
        }
    }

    /** A guess of
     * the relative cost of dealing with the term positions
     * when using a SpanNearQuery instead of a PhraseQuery.
     */
    private static final float PHRASE_TO_SPAN_TERM_POSITIONS_COST = 4.0f;

    private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;

    private static final int TERM_OPS_PER_POS = 7;

    /** Returns an expected cost in simple operations
     *  of processing the occurrences of a term
     *  in a document that contains the term.
     *  <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
     *  @param termsEnum The term is the term at which this TermsEnum is positioned.
     *  <p>
     *  This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost().
     *  <br>
     *  TODO: keep only a single copy of this method and the constants used in it
     *  when CustomSpanTermQuery moves to the o.a.l.search package.
     */
    static float termPositionsCost(TermsEnum termsEnum) throws IOException {
        int docFreq = termsEnum.docFreq();
        assert docFreq > 0;
        long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
        float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
        return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
    }

    @Override
    public String toString(String field) {
        StringBuilder buffer = new StringBuilder();
        if (term.field().equals(field))
            buffer.append(term.text());
        else
            buffer.append(term.toString());
        return buffer.toString();
    }

    @Override
    public int hashCode() {
        return classHash() ^ term.hashCode();
    }

    @Override
    public boolean equals(Object other) {
        return sameClassAs(other) &&
            term.equals(((CustomSpanTermQuery) other).term);
    }

}
